package com.wcong.jsoupanno.handler;

import com.wcong.jsoupanno.annotation.JsoupDocment;
import com.wcong.jsoupanno.annotation.JsoupField;
import com.wcong.jsoupanno.states.ExtractTextType;
import com.wcong.jsoupanno.states.ImgType;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Field;
import java.net.URL;
import java.util.List;
import java.util.UUID;

/**
 * 核心处理类 -- 单例
 *
 * @author wcong
 * @version 1.0
 * @date 2020-08-06 23:32
 */
public class JsoupHandler {

    /**
     * 私有化构造方法
     */
    private JsoupHandler(){

    }

    /**
     * 实例
     */
    private volatile static JsoupHandler JSOUP_INSTANCE;

    /**
     * DCL单例
     * @return
     */
    public static JsoupHandler getInstance(){
        if(JSOUP_INSTANCE == null){
            synchronized (JsoupHandler.class){
                if(JSOUP_INSTANCE == null){
                    JSOUP_INSTANCE = new JsoupHandler();
                }
            }
        }
        return JSOUP_INSTANCE;
    }

    /**
     * 获取BodyDocument
     * @param targetUrl 爬取的URL
     * @return
     * @throws Exception
     */
    private Document getBodyDocument(String targetUrl) throws Exception {
//        是否可以直接爬取
        Document parse = Jsoup.parse(new URL(targetUrl), 5000);
        if(null != parse){
            return parse;
        }
        Connection connect = Jsoup.connect(targetUrl);
//        伪造浏览器请求
        connect.header("Accept","*/*");
        connect.header("Accept-Encoding","gzip, deflate, br");
        connect.header("Accept-Language","zh-CN,zh;q=0.9");
        connect.header("Cache-Control","no-cache");
        connect.header("Connection","keep-alive");
        connect.header("Content-Type","text/plain;charset=UTF-8");
        connect.header("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36");
        return connect.method(Connection.Method.GET).execute().parse();
    }

    /**
     * 执行方法
     * url以url以targetUrl或domain为准或domain为准
     * 当targetUrl没有指定就使用domain
     * @param obj
     * @param clazz
     * @return
     * @throws Exception
     */
    public Object execute(Object obj,Class<?> clazz) throws Exception {
//        判断是否有JsoupDocment注解
        JsoupDocment jsoupDocAno = clazz.getDeclaredAnnotation(JsoupDocment.class);
        if(null == jsoupDocAno){
            throw new RuntimeException("该类上不存在JsoupDocment注解");
        }
//        得到url
        String url = jsoupDocAno.targetUrl();
        if(isBlank(url)){
            url = jsoupDocAno.domain();
        }
        return executeHandler(obj,clazz,url,jsoupDocAno.cssQuery());
    }

    /**
     * 方法重载
     * 以传入的url为准，会忽略注解上的url
     * @param obj
     * @param clazz
     * @param url
     * @return
     * @throws Exception
     */
    public Object execute(Object obj,Class<?> clazz,String url) throws Exception {
        //        判断是否有JsoupDocment注解
        JsoupDocment jsoupDocAno = clazz.getDeclaredAnnotation(JsoupDocment.class);
        if(null == jsoupDocAno){
            throw new RuntimeException("该类上不存在JsoupDocment注解");
        }
        return executeHandler(obj,clazz,url,jsoupDocAno.cssQuery());
    }

    /**
     * 执行方法处理类
     * 二次封装
     * @param obj
     * @param clazz
     * @param url
     * @return
     * @throws Exception
     */
    private Object executeHandler(Object obj,Class clazz,String url,String cssQuery) throws Exception {
        //        body
        Document bodyDocument = getBodyDocument(url);
//        得到parentElement
        Elements modelElements = null;
//        是否爬取整个页面
        boolean cssBlank = isBlank(cssQuery);
        if(!cssBlank){
            modelElements = bodyDocument.select(cssQuery);
        }
//        得到所有的字段
        Field[] fields = clazz.getDeclaredFields();
        if(obj instanceof List){
//        处理List
            return getListHandler((List) obj, clazz, modelElements, fields);
        }else{
//            处理单一对象
            if(cssBlank){
                return getPojoHandler(obj, bodyDocument.body(), fields);
            }else{
                return getPojoHandler(obj, modelElements.get(0), fields);
            }
        }
    }

    /**
     * 处理单一对象
     * @param obj
     * @param modelElement
     * @param fields
     * @return
     * @throws Exception
     */
    private Object getPojoHandler(Object obj, Element modelElement, Field[] fields) throws Exception {
        if(setFieldValue(obj,fields,modelElement)){
            return obj;
        }else{
            return null;
        }
    }

    /**
     * 处理List
     * @param obj
     * @param clazz
     * @param modelElements
     * @param fields
     * @return
     * @throws Exception
     */
    private Object getListHandler(List obj, Class clazz, Elements modelElements, Field[] fields) throws Exception {
        List list = obj;
        for (Element modelElement : modelElements) {
            Object tep = clazz.newInstance();
            if(setFieldValue(tep,fields,modelElement)){
                list.add(tep);
            }
        }
        return list;
    }

    /**
     * 为一个Field[]赋值
     * @param obj 赋值对象
     * @param fields 字段数组
     * @param modelElement 模块
     * @return 这个数据是否需要
     *
     * @throws Exception
     */
    private boolean setFieldValue(Object obj,Field[] fields,Element modelElement) throws Exception {
        //            依次为每个字段赋值
        for (Field field : fields) {
//            暴力反射
            field.setAccessible(true);
//            是否需要级联爬取
            JsoupDocment docmentAnno = field.getDeclaredAnnotation(JsoupDocment.class);
            if(null != docmentAnno){
                repeatExecute(obj, modelElement, field, docmentAnno);
                continue;
            }
//            判断该字段是是否存在JsoupField注解
            JsoupField jsoupFieldAno = field.getDeclaredAnnotation(JsoupField.class);
            if(null == jsoupFieldAno){
                continue;
            }
//            获取单个节点
            Elements fieldElement = modelElement.select(jsoupFieldAno.cssQuery());
//            保存字段结果
            String res;
//            判断是否获取attr
            if(isNotBlank(jsoupFieldAno.attr())){
//                获取attr
                res = fieldElement.attr(jsoupFieldAno.attr());
            }else{
//            判断是获取text还是HTML
                if(jsoupFieldAno.textType() == ExtractTextType.TEXT){
//                text
                    res = fieldElement.text();
                }else{
//                html
                    res = fieldElement.html();
                }
            }
            String repStr = jsoupFieldAno.replaceTarget();
//            判断是否需要替换字符串
            if(isNotBlank(repStr)){
                String replaceRes = jsoupFieldAno.replaceRes();
                res = res.replace(repStr,replaceRes);
            }
//            是否需要数据类型装换
            if(!jsoupFieldAno.isInteger()){

                field.set(obj,res);
//                  返回筛选是否通过
                if(!filterStrHandler(res,jsoupFieldAno)){
                    return false;
                }
            }else {
                int resInt = Integer.valueOf(res);
                field.set(obj,resInt);
//                  返回筛选是否通过
                if(!filterIntHandler(resInt,jsoupFieldAno)){
                    return false;
                }
            }
//            是否需要保存图片
            if(isNotBlank(jsoupFieldAno.imgSaveAttr())){
                res = fieldElement.attr(jsoupFieldAno.imgSaveAttr());
                saveImgHandler(res,jsoupFieldAno.imgSavePath(),jsoupFieldAno.imgFormat());
            }
        }
        return true;
    }

    /**
     * 过滤数字辅助方法
     * @param resInt
     * @param jsoupFieldAno
     * @return
     */
    private boolean filterIntHandler(int resInt, JsoupField jsoupFieldAno) {
        if(jsoupFieldAno.filterIntByGt() != -1){
            return resInt > jsoupFieldAno.filterIntByGt();
        }else if(jsoupFieldAno.filterIntByLt() != -1){
            return resInt < jsoupFieldAno.filterIntByLt();
        }else if(jsoupFieldAno.filterIntByEq() != -1){
            return resInt == jsoupFieldAno.filterIntByEq();
        }else{
//            都没有说明不需要筛选
            return true;
        }
    }

    /**
     * 过滤字符串辅助方法
     * @param resource
     * @param jsoupFieldAno
     * @return
     */
    private boolean filterStrHandler(String resource, JsoupField jsoupFieldAno) {
        if(isNotBlank(jsoupFieldAno.filterStrByContain())){
            return resource.contains(jsoupFieldAno.filterStrByContain());
        }else if(isNotBlank(jsoupFieldAno.filterStrByEquals())) {
            return resource.equals(jsoupFieldAno.filterStrByEquals());
        }else{
//            都没有说明不需要筛选
            return true;
        }
    }

    /**
     * 递归爬取
     * @param obj
     * @param modelElement
     * @param field
     * @param docmentAnno
     * @throws Exception
     */
    private void repeatExecute(Object obj, Element modelElement, Field field, JsoupDocment docmentAnno) throws Exception {
//       先拿到class，后面会用到
        Class<?> clazz = field.getType();
//                获取base url
        String baseUrl = docmentAnno.domain();
        if(isNotBlank(docmentAnno.urlPrefix())){
            baseUrl = docmentAnno.urlPrefix();
        }
//                级联爬取
        String targetUrl;
        if(isNotBlank(docmentAnno.subHrefByField())){
//                    从类中的指定的字段上获取url
            String hrefFieldStr = docmentAnno.subHrefByField();
            Field hrefField = obj.getClass().getDeclaredField(hrefFieldStr);
            hrefField.setAccessible(true);
            targetUrl = (String) hrefField.get(obj);

        }else if(isNotBlank(docmentAnno.subHrefByCss())){
//                    通过选择器获取
            targetUrl = modelElement.select(docmentAnno.subHrefByCss()).attr("href");
        }else{
//                    默认取第一个a标签
            targetUrl = modelElement.select("a").attr("href");
        }
        if(!targetUrl.startsWith("http")){
//                拼接前缀
            targetUrl = baseUrl + "/" + targetUrl;
        }
        Object subObj = clazz.newInstance();
        Object res = execute(subObj, clazz, targetUrl);
        field.set(obj,res);
    }

    /**
     * 处理图片
     * @param url
     * @param dir
     */
    private void saveImgHandler(String url, String dir, ImgType imgType) {
        InputStream is = null;
        FileOutputStream fos = null;
        try {
            if(!url.startsWith("http")){
                url = "http:" + url;
            }
            URL urlObj = new URL(url);
            is = urlObj.openStream();
            StringBuffer sb = new StringBuffer();
            if(isNotBlank(dir)){
                sb.append(dir).append("/");
            }else{
                sb.append(System.getProperty("user.dir")).append("/");
            }
            sb.append(UUID.randomUUID().toString()).append(imgType.getTextName());
            fos = new FileOutputStream(sb.toString());
            byte[] bytes = new byte[1024];
            int len;
            while ((len = is.read(bytes,0,bytes.length))!=-1){
                fos.write(bytes,0,len);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            try {
                if(null != fos){
                    fos.close();
                }
                if(null != is){
                    is.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 验证字符串相关
     * @param cs
     * @return
     */
    private boolean isNotBlank(CharSequence cs) {
        return !isBlank(cs);
    }
    /**
     * 验证字符串相关
     * @param cs
     * @return
     */
    private boolean isBlank(CharSequence cs) {
        int strLen = length(cs);
        if (strLen == 0) {
            return true;
        } else {
            for(int i = 0; i < strLen; ++i) {
                if (!Character.isWhitespace(cs.charAt(i))) {
                    return false;
                }
            }

            return true;
        }
    }
    /**
     * 验证字符串相关
     * @param cs
     * @return
     */
    private int length(CharSequence cs) {
        return cs == null ? 0 : cs.length();
    }


}
